capture log close
clear all
set more off, perm
set seed 1234

///////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////// Table O.2: Priority Subjects by Major ///////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////////////

** Opening Data Linking Career Name to their Code
use "Work Data/Dictionary career code.dta",clear

** Merging Data to Major Info
merge 1:1 career_choice using "Work Data/career_vars2001to2004_appendix.dta"
list if _merge==1 // majors offered after 2004
keep if _merge==3
drop _merge

** Merging Data to Priority Disciplines
merge 1:1 career_choice using "Work Data/priority_discipline.dta"
keep if _merge==3
drop _merge

gen exclude_majors_mainsample=(career_choice==23 | career_choice==25 | career_choice==26 | career_choice==48 | career_choice==86 | career_choice==87 | ///
career_choice==90 | career_choice==91 | career_choice==92 | career_choice==93 | career_choice==100)
tab exclude_majors_mainsample 
tab Career_name if exclude_majors_mainsample==1
drop if exclude_majors_mainsample==1

keep Curso Career_name Career_code_st career_choice prior_port prior_biol prior_chem prior_hist prior_phy prior_geog prior_math cutoff share_female_beforeaa ///
sh_overall_fem_p2 sh_female_p2 total_applicants sample_applicants sh_overall_sample_fem sh_female_mainsample

egen tot_num_sample=sum(sample_applicants)
gen major_prop_sample= sample_applicants/tot_num_sample

tostring prior_*, replace
foreach i in port biol chem hist phy geog math {
replace prior_`i'="\checkmark" if prior_`i'=="1"
replace prior_`i'="" if prior_`i'=="0"
}

gen cutoff2=-cutoff
sort cutoff2 sh_female_mainsample

replace cutoff=round(cutoff,1)
replace major_prop_sample=round(major_prop_sample,0.001)

rename prior_port Portuguese
rename prior_biol Biology
rename prior_chem Chemistry
rename prior_hist History
rename prior_geog Geography
rename prior_math Mathematics
rename prior_phy Physics

gen when = "(Eve.)" if regexm(Curso, "(Noturno)")
egen Major=concat(Career_name when), punct(" ")

local new = _N + 1
set obs `new'
replace Major="Overall" in 51

egen test_sum_sample=sum(major_prop_sample)
replace major_prop_sample=test_sum_sample if major_prop_sample==.

count if sh_female_mainsample==.
egen share_sample_all=max(sh_overall_sample_fem)
replace sh_female_mainsample=share_sample_all if sh_female_mainsample==.
replace sh_female_mainsample=round(sh_female_mainsample,0.001)

*** Main sample applicants, and share female applicants Phase 2

keep Curso Career_code_st Major Portuguese Biology Chemistry History Geography Mathematics Physics cutoff sh_female_mainsample major_prop_sample 
order Curso Career_code_st Major Portuguese Biology Chemistry History Geography Mathematics Physics cutoff sh_female_mainsample major_prop_sample

export excel using "Output/Descriptive_Majors.xlsx", firstrow(variables) replace
